In [1]:
from textblob import TextBlob
import nltk
from nltk.stem import WordNetLemmatizer
from wordcloud import WordCloud
import PIL.Image as image
In [2]:
import pandas as pd
import numpy as np
import os
import string
import matplotlib.pyplot as plt
In [3]:
top15 = pd.read_csv('top15.csv')
In [4]:
t1 = pd.read_csv('01.csv')
t2 = pd.read_csv('02.csv')
t3 = pd.read_csv('03.csv')
t4 = pd.read_csv('04.csv')
t5 = pd.read_csv('05.csv')
t6 = pd.read_csv('06.csv')
t7 = pd.read_csv('07.csv')
t8 = pd.read_csv('08.csv')
t9 = pd.read_csv('09.csv')
t10 = pd.read_csv('10.csv')
t11 = pd.read_csv('11.csv')
t12 = pd.read_csv('12.csv')
t13 = pd.read_csv('13.csv')
t14 = pd.read_csv('14.csv')
t15 = pd.read_csv('15.csv')
In [5]:
comment_arr = top15.to_numpy()

l=len(top15)
comment_arr=[]
subjectivity_arr=[]

text_corpus=""
for t in top15["main"]:
    text_corpus=text_corpus+"."+t

    text_nopunct=''
text_nopunct= "".join([char for char in text_corpus if char not in string.punctuation])

tokenizer = nltk.tokenize.RegexpTokenizer('\w+')
text_tokens = tokenizer.tokenize(text_nopunct)

text_words = []
for word in text_tokens:
    text_words.append(word.lower())
    
stopwords = nltk.corpus.stopwords.words('english')

final_words=[]

for word in text_words:
    if word not in stopwords:
        final_words.append(word)
        
wn = WordNetLemmatizer()

lem_final_words=[]

for word in final_words:
    word=wn.lemmatize(word)
    lem_final_words.append(word)

freq_dist_text = nltk.FreqDist(lem_final_words)
In [6]:
plt.figure(figsize=(10,6))
plt.figtext(.5,.9,'Top30 words in comment title of all tourists', fontsize=20, ha='center')
freq_dist_text.plot(30)
Out[6]:
<AxesSubplot:xlabel='Samples', ylabel='Counts'>
In [7]:
res_text=' '.join([i for i in lem_final_words if not i.isdigit()])
In [18]:
plt.figure(figsize=(20,16))
mask = np.array(image.open('glasgow.jpg'))
wordcloud = WordCloud(    mask = mask,
                          background_color='white',
                          max_words=300,
                          max_font_size=400,
                          width=2000,
                          height=1666
                         ).generate(res_text)


plt.imshow(wordcloud)
plt.figtext(.5,.9,'Top200 words in comment title of tourists', fontsize=20, ha='center')
plt.axis('off')
plt.show()
In [19]:
comment_arr = t1.to_numpy()

l=len(t1)
comment_arr=[]
subjectivity_arr=[]

text_corpus=""
for t in t1["content"]:
    text_corpus=text_corpus+"."+t

    text_nopunct=''
text_nopunct= "".join([char for char in text_corpus if char not in string.punctuation])

tokenizer = nltk.tokenize.RegexpTokenizer('\w+')
text_tokens = tokenizer.tokenize(text_nopunct)

text_words = []
for word in text_tokens:
    text_words.append(word.lower())
    
stopwords = nltk.corpus.stopwords.words('english')

final_words=[]

for word in text_words:
    if word not in stopwords:
        final_words.append(word)
        
wn = WordNetLemmatizer()

lem_final_words=[]

for word in final_words:
    word=wn.lemmatize(word)
    lem_final_words.append(word)

freq_dist_text = nltk.FreqDist(lem_final_words)
In [21]:
plt.figure(figsize=(10,6))
plt.figtext(.5,.9,'Top30 words in comment of Kelvingrove Museum', fontsize=20, ha='center')
freq_dist_text.plot(30)
Out[21]:
<AxesSubplot:xlabel='Samples', ylabel='Counts'>
In [22]:
res_text=' '.join([i for i in lem_final_words if not i.isdigit()])
In [26]:
plt.figure(figsize=(20,16))
mask = np.array(image.open('01.jpg'))
wordcloud = WordCloud(    mask = mask,
                          background_color='white',
                          max_words=300,
                          max_font_size=400,
                          width=2000,
                          height=1666
                         ).generate(res_text)


plt.imshow(wordcloud)
plt.figtext(.5,.9,'Top200 words in comment of Kelvingrove Museum', fontsize=20, ha='center')
plt.axis('off')
plt.show()
In [27]:
comment_arr = t1.to_numpy()

l=len(t1)
comment_arr=[]
subjectivity_arr=[]

text_corpus=""
for t in t1["main"]:
    text_corpus=text_corpus+"."+t

    text_nopunct=''
text_nopunct= "".join([char for char in text_corpus if char not in string.punctuation])

tokenizer = nltk.tokenize.RegexpTokenizer('\w+')
text_tokens = tokenizer.tokenize(text_nopunct)

text_words = []
for word in text_tokens:
    text_words.append(word.lower())
    
stopwords = nltk.corpus.stopwords.words('english')

final_words=[]

for word in text_words:
    if word not in stopwords:
        final_words.append(word)
        
wn = WordNetLemmatizer()

lem_final_words=[]

for word in final_words:
    word=wn.lemmatize(word)
    lem_final_words.append(word)

freq_dist_text = nltk.FreqDist(lem_final_words)

plt.figure(figsize=(10,6))
plt.figtext(.5,.9,'Top30 words in comment title of Kelvingrove Museum', fontsize=20, ha='center')
freq_dist_text.plot(30)
Out[27]:
<AxesSubplot:xlabel='Samples', ylabel='Counts'>
In [28]:
res_text=' '.join([i for i in lem_final_words if not i.isdigit()])

plt.figure(figsize=(20,16))
mask = np.array(image.open('01.jpg'))
wordcloud = WordCloud(    mask = mask,
                          background_color='white',
                          max_words=300,
                          max_font_size=400,
                          width=2000,
                          height=1666
                         ).generate(res_text)


plt.imshow(wordcloud)
plt.axis('off')
plt.show()
In [29]:
comment_arr = t2.to_numpy()

l=len(t2)
comment_arr=[]
subjectivity_arr=[]

text_corpus=""
for t in t2["content"]:
    text_corpus=text_corpus+"."+t

    text_nopunct=''
text_nopunct= "".join([char for char in text_corpus if char not in string.punctuation])

tokenizer = nltk.tokenize.RegexpTokenizer('\w+')
text_tokens = tokenizer.tokenize(text_nopunct)

text_words = []
for word in text_tokens:
    text_words.append(word.lower())
    
stopwords = nltk.corpus.stopwords.words('english')

final_words=[]

for word in text_words:
    if word not in stopwords:
        final_words.append(word)
        
wn = WordNetLemmatizer()

lem_final_words=[]

for word in final_words:
    word=wn.lemmatize(word)
    lem_final_words.append(word)

freq_dist_text = nltk.FreqDist(lem_final_words)

plt.figure(figsize=(10,6))
plt.figtext(.5,.9,'Top30 words in comment of The Riverside Museum', fontsize=20, ha='center')
freq_dist_text.plot(30)
Out[29]:
<AxesSubplot:xlabel='Samples', ylabel='Counts'>
In [31]:
res_text=' '.join([i for i in lem_final_words if not i.isdigit()])

plt.figure(figsize=(20,16))
mask = np.array(image.open('02.jpg'))
wordcloud = WordCloud(    mask = mask,
                          background_color='white',
                          max_words=300,
                          max_font_size=400,
                          width=2000,
                          height=1666
                         ).generate(res_text)


plt.imshow(wordcloud)
plt.axis('off')
plt.show()
In [32]:
comment_arr = t2.to_numpy()

l=len(t2)
comment_arr=[]
subjectivity_arr=[]

text_corpus=""
for t in t2["main"]:
    text_corpus=text_corpus+"."+t

    text_nopunct=''
text_nopunct= "".join([char for char in text_corpus if char not in string.punctuation])

tokenizer = nltk.tokenize.RegexpTokenizer('\w+')
text_tokens = tokenizer.tokenize(text_nopunct)

text_words = []
for word in text_tokens:
    text_words.append(word.lower())
    
stopwords = nltk.corpus.stopwords.words('english')

final_words=[]

for word in text_words:
    if word not in stopwords:
        final_words.append(word)
        
wn = WordNetLemmatizer()

lem_final_words=[]

for word in final_words:
    word=wn.lemmatize(word)
    lem_final_words.append(word)

freq_dist_text = nltk.FreqDist(lem_final_words)

plt.figure(figsize=(10,6))
plt.figtext(.5,.9,'Top30 words in comment title of The Riverside Museum', fontsize=20, ha='center')
freq_dist_text.plot(30)
Out[32]:
<AxesSubplot:xlabel='Samples', ylabel='Counts'>
In [33]:
res_text=' '.join([i for i in lem_final_words if not i.isdigit()])

plt.figure(figsize=(20,16))
mask = np.array(image.open('02.jpg'))
wordcloud = WordCloud(    mask = mask,
                          background_color='white',
                          max_words=300,
                          max_font_size=400,
                          width=2000,
                          height=1666
                         ).generate(res_text)


plt.imshow(wordcloud)
plt.axis('off')
plt.show()
In [38]:
comment_arr = t3.to_numpy()

l=len(t3)
comment_arr=[]
subjectivity_arr=[]

text_corpus=""
for t in t3["content"]:
    text_corpus=text_corpus+"."+t

    text_nopunct=''
text_nopunct= "".join([char for char in text_corpus if char not in string.punctuation])

tokenizer = nltk.tokenize.RegexpTokenizer('\w+')
text_tokens = tokenizer.tokenize(text_nopunct)

text_words = []
for word in text_tokens:
    text_words.append(word.lower())
    
stopwords = nltk.corpus.stopwords.words('english')

final_words=[]

for word in text_words:
    if word not in stopwords:
        final_words.append(word)
        
wn = WordNetLemmatizer()

lem_final_words=[]

for word in final_words:
    word=wn.lemmatize(word)
    lem_final_words.append(word)

freq_dist_text = nltk.FreqDist(lem_final_words)

plt.figure(figsize=(10,6))
plt.figtext(.5,.9,'Top30 words in comment of Glengoyne Distillery', fontsize=20, ha='center')
freq_dist_text.plot(30)
Out[38]:
<AxesSubplot:xlabel='Samples', ylabel='Counts'>
In [37]:
res_text=' '.join([i for i in lem_final_words if not i.isdigit()])

plt.figure(figsize=(20,16))
mask = np.array(image.open('03.jpg'))
wordcloud = WordCloud(    mask = mask,
                          background_color='white',
                          max_words=300,
                          max_font_size=400,
                          width=2000,
                          height=1666
                         ).generate(res_text)


plt.imshow(wordcloud)
plt.axis('off')
plt.show()
In [39]:
comment_arr = t3.to_numpy()

l=len(t3)
comment_arr=[]
subjectivity_arr=[]

text_corpus=""
for t in t3["main"]:
    text_corpus=text_corpus+"."+t

    text_nopunct=''
text_nopunct= "".join([char for char in text_corpus if char not in string.punctuation])

tokenizer = nltk.tokenize.RegexpTokenizer('\w+')
text_tokens = tokenizer.tokenize(text_nopunct)

text_words = []
for word in text_tokens:
    text_words.append(word.lower())
    
stopwords = nltk.corpus.stopwords.words('english')

final_words=[]

for word in text_words:
    if word not in stopwords:
        final_words.append(word)
        
wn = WordNetLemmatizer()

lem_final_words=[]

for word in final_words:
    word=wn.lemmatize(word)
    lem_final_words.append(word)

freq_dist_text = nltk.FreqDist(lem_final_words)

plt.figure(figsize=(10,6))
plt.figtext(.5,.9,'Top30 words in comment title of Glengoyne Distillery', fontsize=20, ha='center')
freq_dist_text.plot(30)
Out[39]:
<AxesSubplot:xlabel='Samples', ylabel='Counts'>
In [40]:
res_text=' '.join([i for i in lem_final_words if not i.isdigit()])

plt.figure(figsize=(20,16))
mask = np.array(image.open('03.jpg'))
wordcloud = WordCloud(    mask = mask,
                          background_color='white',
                          max_words=300,
                          max_font_size=400,
                          width=2000,
                          height=1666
                         ).generate(res_text)


plt.imshow(wordcloud)
plt.axis('off')
plt.show()
In [41]:
comment_arr = t4.to_numpy()

l=len(t4)
comment_arr=[]
subjectivity_arr=[]

text_corpus=""
for t in t4["content"]:
    text_corpus=text_corpus+"."+t

    text_nopunct=''
text_nopunct= "".join([char for char in text_corpus if char not in string.punctuation])

tokenizer = nltk.tokenize.RegexpTokenizer('\w+')
text_tokens = tokenizer.tokenize(text_nopunct)

text_words = []
for word in text_tokens:
    text_words.append(word.lower())
    
stopwords = nltk.corpus.stopwords.words('english')

final_words=[]

for word in text_words:
    if word not in stopwords:
        final_words.append(word)
        
wn = WordNetLemmatizer()

lem_final_words=[]

for word in final_words:
    word=wn.lemmatize(word)
    lem_final_words.append(word)

freq_dist_text = nltk.FreqDist(lem_final_words)

plt.figure(figsize=(10,6))
plt.figtext(.5,.9,'Top30 words in comment of Celtic Park', fontsize=20, ha='center')
freq_dist_text.plot(30)
Out[41]:
<AxesSubplot:xlabel='Samples', ylabel='Counts'>
In [43]:
res_text=' '.join([i for i in lem_final_words if not i.isdigit()])

plt.figure(figsize=(20,16))
mask = np.array(image.open('04.jpg'))
wordcloud = WordCloud(    mask = mask,
                          background_color='white',
                          max_words=300,
                          max_font_size=400,
                          width=2000,
                          height=1666
                         ).generate(res_text)


plt.imshow(wordcloud)
plt.axis('off')
plt.show()
In [44]:
comment_arr = t4.to_numpy()

l=len(t4)
comment_arr=[]
subjectivity_arr=[]

text_corpus=""
for t in t4["main"]:
    text_corpus=text_corpus+"."+t

    text_nopunct=''
text_nopunct= "".join([char for char in text_corpus if char not in string.punctuation])

tokenizer = nltk.tokenize.RegexpTokenizer('\w+')
text_tokens = tokenizer.tokenize(text_nopunct)

text_words = []
for word in text_tokens:
    text_words.append(word.lower())
    
stopwords = nltk.corpus.stopwords.words('english')

final_words=[]

for word in text_words:
    if word not in stopwords:
        final_words.append(word)
        
wn = WordNetLemmatizer()

lem_final_words=[]

for word in final_words:
    word=wn.lemmatize(word)
    lem_final_words.append(word)

freq_dist_text = nltk.FreqDist(lem_final_words)

plt.figure(figsize=(10,6))
plt.figtext(.5,.9,'Top30 words in comment title of Celtic Park', fontsize=20, ha='center')
freq_dist_text.plot(30)
Out[44]:
<AxesSubplot:xlabel='Samples', ylabel='Counts'>
In [45]:
res_text=' '.join([i for i in lem_final_words if not i.isdigit()])

plt.figure(figsize=(20,16))
mask = np.array(image.open('04.jpg'))
wordcloud = WordCloud(    mask = mask,
                          background_color='white',
                          max_words=300,
                          max_font_size=400,
                          width=2000,
                          height=1666
                         ).generate(res_text)


plt.imshow(wordcloud)
plt.axis('off')
plt.show()
In [46]:
comment_arr = t5.to_numpy()

l=len(t5)
comment_arr=[]
subjectivity_arr=[]

text_corpus=""
for t in t5["content"]:
    text_corpus=text_corpus+"."+t

    text_nopunct=''
text_nopunct= "".join([char for char in text_corpus if char not in string.punctuation])

tokenizer = nltk.tokenize.RegexpTokenizer('\w+')
text_tokens = tokenizer.tokenize(text_nopunct)

text_words = []
for word in text_tokens:
    text_words.append(word.lower())
    
stopwords = nltk.corpus.stopwords.words('english')

final_words=[]

for word in text_words:
    if word not in stopwords:
        final_words.append(word)
        
wn = WordNetLemmatizer()

lem_final_words=[]

for word in final_words:
    word=wn.lemmatize(word)
    lem_final_words.append(word)

freq_dist_text = nltk.FreqDist(lem_final_words)

plt.figure(figsize=(10,6))
plt.figtext(.5,.9,'Top30 words in comment of University of Glasgow', fontsize=20, ha='center')
freq_dist_text.plot(30)
Out[46]:
<AxesSubplot:xlabel='Samples', ylabel='Counts'>
In [47]:
res_text=' '.join([i for i in lem_final_words if not i.isdigit()])

plt.figure(figsize=(20,16))
mask = np.array(image.open('05.jpg'))
wordcloud = WordCloud(    mask = mask,
                          background_color='white',
                          max_words=300,
                          max_font_size=400,
                          width=2000,
                          height=1666
                         ).generate(res_text)


plt.imshow(wordcloud)
plt.axis('off')
plt.show()
In [49]:
comment_arr = t5.to_numpy()

l=len(t5)
comment_arr=[]
subjectivity_arr=[]

text_corpus=""
for t in t5["main"]:
    text_corpus=text_corpus+"."+t

    text_nopunct=''
text_nopunct= "".join([char for char in text_corpus if char not in string.punctuation])

tokenizer = nltk.tokenize.RegexpTokenizer('\w+')
text_tokens = tokenizer.tokenize(text_nopunct)

text_words = []
for word in text_tokens:
    text_words.append(word.lower())
    
stopwords = nltk.corpus.stopwords.words('english')

final_words=[]

for word in text_words:
    if word not in stopwords:
        final_words.append(word)
        
wn = WordNetLemmatizer()

lem_final_words=[]

for word in final_words:
    word=wn.lemmatize(word)
    lem_final_words.append(word)

freq_dist_text = nltk.FreqDist(lem_final_words)

plt.figure(figsize=(10,6))
plt.figtext(.5,.9,'Top30 words in comment title of University of Glasgow', fontsize=20, ha='center')
freq_dist_text.plot(30)
Out[49]:
<AxesSubplot:xlabel='Samples', ylabel='Counts'>
In [50]:
res_text=' '.join([i for i in lem_final_words if not i.isdigit()])

plt.figure(figsize=(20,16))
mask = np.array(image.open('05.jpg'))
wordcloud = WordCloud(    mask = mask,
                          background_color='white',
                          max_words=300,
                          max_font_size=400,
                          width=2000,
                          height=1666
                         ).generate(res_text)


plt.imshow(wordcloud)
plt.axis('off')
plt.show()
In [51]:
comment_arr = t6.to_numpy()

l=len(t6)
comment_arr=[]
subjectivity_arr=[]

text_corpus=""
for t in t6["content"]:
    text_corpus=text_corpus+"."+t

    text_nopunct=''
text_nopunct= "".join([char for char in text_corpus if char not in string.punctuation])

tokenizer = nltk.tokenize.RegexpTokenizer('\w+')
text_tokens = tokenizer.tokenize(text_nopunct)

text_words = []
for word in text_tokens:
    text_words.append(word.lower())
    
stopwords = nltk.corpus.stopwords.words('english')

final_words=[]

for word in text_words:
    if word not in stopwords:
        final_words.append(word)
        
wn = WordNetLemmatizer()

lem_final_words=[]

for word in final_words:
    word=wn.lemmatize(word)
    lem_final_words.append(word)

freq_dist_text = nltk.FreqDist(lem_final_words)

plt.figure(figsize=(10,6))
plt.figtext(.5,.9,'Top30 words in comment of The Necropolis', fontsize=20, ha='center')
freq_dist_text.plot(30)
Out[51]:
<AxesSubplot:xlabel='Samples', ylabel='Counts'>
In [52]:
res_text=' '.join([i for i in lem_final_words if not i.isdigit()])

plt.figure(figsize=(20,16))
mask = np.array(image.open('06.jpg'))
wordcloud = WordCloud(    mask = mask,
                          background_color='white',
                          max_words=300,
                          max_font_size=400,
                          width=2000,
                          height=1666
                         ).generate(res_text)


plt.imshow(wordcloud)
plt.axis('off')
plt.show()
In [53]:
comment_arr = t6.to_numpy()

l=len(t6)
comment_arr=[]
subjectivity_arr=[]

text_corpus=""
for t in t6["main"]:
    text_corpus=text_corpus+"."+t

    text_nopunct=''
text_nopunct= "".join([char for char in text_corpus if char not in string.punctuation])

tokenizer = nltk.tokenize.RegexpTokenizer('\w+')
text_tokens = tokenizer.tokenize(text_nopunct)

text_words = []
for word in text_tokens:
    text_words.append(word.lower())
    
stopwords = nltk.corpus.stopwords.words('english')

final_words=[]

for word in text_words:
    if word not in stopwords:
        final_words.append(word)
        
wn = WordNetLemmatizer()

lem_final_words=[]

for word in final_words:
    word=wn.lemmatize(word)
    lem_final_words.append(word)

freq_dist_text = nltk.FreqDist(lem_final_words)

plt.figure(figsize=(10,6))
plt.figtext(.5,.9,'Top30 words in comment title of The Necropolis', fontsize=20, ha='center')
freq_dist_text.plot(30)
Out[53]:
<AxesSubplot:xlabel='Samples', ylabel='Counts'>
In [54]:
res_text=' '.join([i for i in lem_final_words if not i.isdigit()])

plt.figure(figsize=(20,16))
mask = np.array(image.open('06.jpg'))
wordcloud = WordCloud(    mask = mask,
                          background_color='white',
                          max_words=300,
                          max_font_size=400,
                          width=2000,
                          height=1666
                         ).generate(res_text)


plt.imshow(wordcloud)
plt.axis('off')
plt.show()
In [55]:
comment_arr = t7.to_numpy()

l=len(t7)
comment_arr=[]
subjectivity_arr=[]

text_corpus=""
for t in t7["content"]:
    text_corpus=text_corpus+"."+t

    text_nopunct=''
text_nopunct= "".join([char for char in text_corpus if char not in string.punctuation])

tokenizer = nltk.tokenize.RegexpTokenizer('\w+')
text_tokens = tokenizer.tokenize(text_nopunct)

text_words = []
for word in text_tokens:
    text_words.append(word.lower())
    
stopwords = nltk.corpus.stopwords.words('english')

final_words=[]

for word in text_words:
    if word not in stopwords:
        final_words.append(word)
        
wn = WordNetLemmatizer()

lem_final_words=[]

for word in final_words:
    word=wn.lemmatize(word)
    lem_final_words.append(word)

freq_dist_text = nltk.FreqDist(lem_final_words)

plt.figure(figsize=(10,6))
plt.figtext(.5,.9,'Top30 words in comment of Tennents Wellpark Brewery', fontsize=20, ha='center')
freq_dist_text.plot(30)
Out[55]:
<AxesSubplot:xlabel='Samples', ylabel='Counts'>
In [57]:
res_text=' '.join([i for i in lem_final_words if not i.isdigit()])

plt.figure(figsize=(20,16))
mask = np.array(image.open('07.jpg'))
wordcloud = WordCloud(    mask = mask,
                          background_color='white',
                          max_words=300,
                          max_font_size=400,
                          width=2000,
                          height=1666
                         ).generate(res_text)


plt.imshow(wordcloud)
plt.axis('off')
plt.show()
In [58]:
comment_arr = t7.to_numpy()

l=len(t7)
comment_arr=[]
subjectivity_arr=[]

text_corpus=""
for t in t7["main"]:
    text_corpus=text_corpus+"."+t

    text_nopunct=''
text_nopunct= "".join([char for char in text_corpus if char not in string.punctuation])

tokenizer = nltk.tokenize.RegexpTokenizer('\w+')
text_tokens = tokenizer.tokenize(text_nopunct)

text_words = []
for word in text_tokens:
    text_words.append(word.lower())
    
stopwords = nltk.corpus.stopwords.words('english')

final_words=[]

for word in text_words:
    if word not in stopwords:
        final_words.append(word)
        
wn = WordNetLemmatizer()

lem_final_words=[]

for word in final_words:
    word=wn.lemmatize(word)
    lem_final_words.append(word)

freq_dist_text = nltk.FreqDist(lem_final_words)

plt.figure(figsize=(10,6))
plt.figtext(.5,.9,'Top30 words in comment title of Tennents Wellpark Brewery', fontsize=20, ha='center')
freq_dist_text.plot(30)
Out[58]:
<AxesSubplot:xlabel='Samples', ylabel='Counts'>
In [59]:
res_text=' '.join([i for i in lem_final_words if not i.isdigit()])

plt.figure(figsize=(20,16))
mask = np.array(image.open('07.jpg'))
wordcloud = WordCloud(    mask = mask,
                          background_color='white',
                          max_words=300,
                          max_font_size=400,
                          width=2000,
                          height=1666
                         ).generate(res_text)


plt.imshow(wordcloud)
plt.axis('off')
plt.show()
In [60]:
comment_arr = t8.to_numpy()

l=len(t8)
comment_arr=[]
subjectivity_arr=[]

text_corpus=""
for t in t8["content"]:
    text_corpus=text_corpus+"."+t

    text_nopunct=''
text_nopunct= "".join([char for char in text_corpus if char not in string.punctuation])

tokenizer = nltk.tokenize.RegexpTokenizer('\w+')
text_tokens = tokenizer.tokenize(text_nopunct)

text_words = []
for word in text_tokens:
    text_words.append(word.lower())
    
stopwords = nltk.corpus.stopwords.words('english')

final_words=[]

for word in text_words:
    if word not in stopwords:
        final_words.append(word)
        
wn = WordNetLemmatizer()

lem_final_words=[]

for word in final_words:
    word=wn.lemmatize(word)
    lem_final_words.append(word)

freq_dist_text = nltk.FreqDist(lem_final_words)

plt.figure(figsize=(10,6))
plt.figtext(.5,.9,'Top30 words in comment of The Clydeside Distillery', fontsize=20, ha='center')
freq_dist_text.plot(30)
Out[60]:
<AxesSubplot:xlabel='Samples', ylabel='Counts'>
In [61]:
res_text=' '.join([i for i in lem_final_words if not i.isdigit()])

plt.figure(figsize=(20,16))
mask = np.array(image.open('08.jpg'))
wordcloud = WordCloud(    mask = mask,
                          background_color='white',
                          max_words=300,
                          max_font_size=400,
                          width=2000,
                          height=1666
                         ).generate(res_text)


plt.imshow(wordcloud)
plt.axis('off')
plt.show()
In [62]:
comment_arr = t8.to_numpy()

l=len(t8)
comment_arr=[]
subjectivity_arr=[]

text_corpus=""
for t in t8["main"]:
    text_corpus=text_corpus+"."+t

    text_nopunct=''
text_nopunct= "".join([char for char in text_corpus if char not in string.punctuation])

tokenizer = nltk.tokenize.RegexpTokenizer('\w+')
text_tokens = tokenizer.tokenize(text_nopunct)

text_words = []
for word in text_tokens:
    text_words.append(word.lower())
    
stopwords = nltk.corpus.stopwords.words('english')

final_words=[]

for word in text_words:
    if word not in stopwords:
        final_words.append(word)
        
wn = WordNetLemmatizer()

lem_final_words=[]

for word in final_words:
    word=wn.lemmatize(word)
    lem_final_words.append(word)

freq_dist_text = nltk.FreqDist(lem_final_words)

plt.figure(figsize=(10,6))
plt.figtext(.5,.9,'Top30 words in comment title of The Clydeside Distillery', fontsize=20, ha='center')
freq_dist_text.plot(30)
Out[62]:
<AxesSubplot:xlabel='Samples', ylabel='Counts'>
In [63]:
res_text=' '.join([i for i in lem_final_words if not i.isdigit()])

plt.figure(figsize=(20,16))
mask = np.array(image.open('08.jpg'))
wordcloud = WordCloud(    mask = mask,
                          background_color='white',
                          max_words=300,
                          max_font_size=400,
                          width=2000,
                          height=1666
                         ).generate(res_text)


plt.imshow(wordcloud)
plt.axis('off')
plt.show()
In [65]:
comment_arr = t9.to_numpy()

l=len(t9)
comment_arr=[]
subjectivity_arr=[]

text_corpus=""
for t in t9["content"]:
    text_corpus=text_corpus+"."+t

    text_nopunct=''
text_nopunct= "".join([char for char in text_corpus if char not in string.punctuation])

tokenizer = nltk.tokenize.RegexpTokenizer('\w+')
text_tokens = tokenizer.tokenize(text_nopunct)

text_words = []
for word in text_tokens:
    text_words.append(word.lower())
    
stopwords = nltk.corpus.stopwords.words('english')

final_words=[]

for word in text_words:
    if word not in stopwords:
        final_words.append(word)
        
wn = WordNetLemmatizer()

lem_final_words=[]

for word in final_words:
    word=wn.lemmatize(word)
    lem_final_words.append(word)

freq_dist_text = nltk.FreqDist(lem_final_words)

plt.figure(figsize=(10,6))
plt.figtext(.5,.9,'Top30 words in comment of Ibrox Stadium', fontsize=20, ha='center')
freq_dist_text.plot(30)
Out[65]:
<AxesSubplot:xlabel='Samples', ylabel='Counts'>
In [66]:
res_text=' '.join([i for i in lem_final_words if not i.isdigit()])

plt.figure(figsize=(20,16))
mask = np.array(image.open('09.jpg'))
wordcloud = WordCloud(    mask = mask,
                          background_color='white',
                          max_words=300,
                          max_font_size=400,
                          width=2000,
                          height=1666
                         ).generate(res_text)


plt.imshow(wordcloud)
plt.axis('off')
plt.show()
In [67]:
comment_arr = t9.to_numpy()

l=len(t9)
comment_arr=[]
subjectivity_arr=[]

text_corpus=""
for t in t9["main"]:
    text_corpus=text_corpus+"."+t

    text_nopunct=''
text_nopunct= "".join([char for char in text_corpus if char not in string.punctuation])

tokenizer = nltk.tokenize.RegexpTokenizer('\w+')
text_tokens = tokenizer.tokenize(text_nopunct)

text_words = []
for word in text_tokens:
    text_words.append(word.lower())
    
stopwords = nltk.corpus.stopwords.words('english')

final_words=[]

for word in text_words:
    if word not in stopwords:
        final_words.append(word)
        
wn = WordNetLemmatizer()

lem_final_words=[]

for word in final_words:
    word=wn.lemmatize(word)
    lem_final_words.append(word)

freq_dist_text = nltk.FreqDist(lem_final_words)

plt.figure(figsize=(10,6))
plt.figtext(.5,.9,'Top30 words in comment title of Ibrox Stadium', fontsize=20, ha='center')
freq_dist_text.plot(30)
Out[67]:
<AxesSubplot:xlabel='Samples', ylabel='Counts'>
In [68]:
res_text=' '.join([i for i in lem_final_words if not i.isdigit()])

plt.figure(figsize=(20,16))
mask = np.array(image.open('09.jpg'))
wordcloud = WordCloud(    mask = mask,
                          background_color='white',
                          max_words=300,
                          max_font_size=400,
                          width=2000,
                          height=1666
                         ).generate(res_text)


plt.imshow(wordcloud)
plt.axis('off')
plt.show()
In [69]:
comment_arr = t10.to_numpy()

l=len(t10)
comment_arr=[]
subjectivity_arr=[]

text_corpus=""
for t in t10["content"]:
    text_corpus=text_corpus+"."+t

    text_nopunct=''
text_nopunct= "".join([char for char in text_corpus if char not in string.punctuation])

tokenizer = nltk.tokenize.RegexpTokenizer('\w+')
text_tokens = tokenizer.tokenize(text_nopunct)

text_words = []
for word in text_tokens:
    text_words.append(word.lower())
    
stopwords = nltk.corpus.stopwords.words('english')

final_words=[]

for word in text_words:
    if word not in stopwords:
        final_words.append(word)
        
wn = WordNetLemmatizer()

lem_final_words=[]

for word in final_words:
    word=wn.lemmatize(word)
    lem_final_words.append(word)

freq_dist_text = nltk.FreqDist(lem_final_words)

plt.figure(figsize=(10,6))
plt.figtext(.5,.9,'Top30 words in comment of Glasgow Botanic Gardens', fontsize=20, ha='center')
freq_dist_text.plot(30)
Out[69]:
<AxesSubplot:xlabel='Samples', ylabel='Counts'>
In [70]:
res_text=' '.join([i for i in lem_final_words if not i.isdigit()])

plt.figure(figsize=(20,16))
mask = np.array(image.open('10.jpg'))
wordcloud = WordCloud(    mask = mask,
                          background_color='white',
                          max_words=300,
                          max_font_size=400,
                          width=2000,
                          height=1666
                         ).generate(res_text)


plt.imshow(wordcloud)
plt.axis('off')
plt.show()
In [71]:
comment_arr = t10.to_numpy()

l=len(t10)
comment_arr=[]
subjectivity_arr=[]

text_corpus=""
for t in t10["main"]:
    text_corpus=text_corpus+"."+t

    text_nopunct=''
text_nopunct= "".join([char for char in text_corpus if char not in string.punctuation])

tokenizer = nltk.tokenize.RegexpTokenizer('\w+')
text_tokens = tokenizer.tokenize(text_nopunct)

text_words = []
for word in text_tokens:
    text_words.append(word.lower())
    
stopwords = nltk.corpus.stopwords.words('english')

final_words=[]

for word in text_words:
    if word not in stopwords:
        final_words.append(word)
        
wn = WordNetLemmatizer()

lem_final_words=[]

for word in final_words:
    word=wn.lemmatize(word)
    lem_final_words.append(word)

freq_dist_text = nltk.FreqDist(lem_final_words)

plt.figure(figsize=(10,6))
plt.figtext(.5,.9,'Top30 words in comment title of Glasgow Botanic Gardens', fontsize=20, ha='center')
freq_dist_text.plot(30)
Out[71]:
<AxesSubplot:xlabel='Samples', ylabel='Counts'>
In [72]:
res_text=' '.join([i for i in lem_final_words if not i.isdigit()])

plt.figure(figsize=(20,16))
mask = np.array(image.open('10.jpg'))
wordcloud = WordCloud(    mask = mask,
                          background_color='white',
                          max_words=300,
                          max_font_size=400,
                          width=2000,
                          height=1666
                         ).generate(res_text)


plt.imshow(wordcloud)
plt.axis('off')
plt.show()
In [73]:
comment_arr = t11.to_numpy()

l=len(t11)
comment_arr=[]
subjectivity_arr=[]

text_corpus=""
for t in t11["content"]:
    text_corpus=text_corpus+"."+t

    text_nopunct=''
text_nopunct= "".join([char for char in text_corpus if char not in string.punctuation])

tokenizer = nltk.tokenize.RegexpTokenizer('\w+')
text_tokens = tokenizer.tokenize(text_nopunct)

text_words = []
for word in text_tokens:
    text_words.append(word.lower())
    
stopwords = nltk.corpus.stopwords.words('english')

final_words=[]

for word in text_words:
    if word not in stopwords:
        final_words.append(word)
        
wn = WordNetLemmatizer()

lem_final_words=[]

for word in final_words:
    word=wn.lemmatize(word)
    lem_final_words.append(word)

freq_dist_text = nltk.FreqDist(lem_final_words)

plt.figure(figsize=(10,6))
plt.figtext(.5,.9,'Top30 words in comment of Glasgow Science Centre', fontsize=20, ha='center')
freq_dist_text.plot(30)
Out[73]:
<AxesSubplot:xlabel='Samples', ylabel='Counts'>
In [74]:
res_text=' '.join([i for i in lem_final_words if not i.isdigit()])

plt.figure(figsize=(20,16))
mask = np.array(image.open('11.jpg'))
wordcloud = WordCloud(    mask = mask,
                          background_color='white',
                          max_words=300,
                          max_font_size=400,
                          width=2000,
                          height=1666
                         ).generate(res_text)


plt.imshow(wordcloud)
plt.axis('off')
plt.show()
In [75]:
comment_arr = t11.to_numpy()

l=len(t11)
comment_arr=[]
subjectivity_arr=[]

text_corpus=""
for t in t11["main"]:
    text_corpus=text_corpus+"."+t

    text_nopunct=''
text_nopunct= "".join([char for char in text_corpus if char not in string.punctuation])

tokenizer = nltk.tokenize.RegexpTokenizer('\w+')
text_tokens = tokenizer.tokenize(text_nopunct)

text_words = []
for word in text_tokens:
    text_words.append(word.lower())
    
stopwords = nltk.corpus.stopwords.words('english')

final_words=[]

for word in text_words:
    if word not in stopwords:
        final_words.append(word)
        
wn = WordNetLemmatizer()

lem_final_words=[]

for word in final_words:
    word=wn.lemmatize(word)
    lem_final_words.append(word)

freq_dist_text = nltk.FreqDist(lem_final_words)

plt.figure(figsize=(10,6))
plt.figtext(.5,.9,'Top30 words in comment title of Glasgow Science Centre', fontsize=20, ha='center')
freq_dist_text.plot(30)
Out[75]:
<AxesSubplot:xlabel='Samples', ylabel='Counts'>
In [76]:
res_text=' '.join([i for i in lem_final_words if not i.isdigit()])

plt.figure(figsize=(20,16))
mask = np.array(image.open('11.jpg'))
wordcloud = WordCloud(    mask = mask,
                          background_color='white',
                          max_words=300,
                          max_font_size=400,
                          width=2000,
                          height=1666
                         ).generate(res_text)


plt.imshow(wordcloud)
plt.axis('off')
plt.show()
In [77]:
comment_arr = t12.to_numpy()

l=len(t12)
comment_arr=[]
subjectivity_arr=[]

text_corpus=""
for t in t12["content"]:
    text_corpus=text_corpus+"."+t

    text_nopunct=''
text_nopunct= "".join([char for char in text_corpus if char not in string.punctuation])

tokenizer = nltk.tokenize.RegexpTokenizer('\w+')
text_tokens = tokenizer.tokenize(text_nopunct)

text_words = []
for word in text_tokens:
    text_words.append(word.lower())
    
stopwords = nltk.corpus.stopwords.words('english')

final_words=[]

for word in text_words:
    if word not in stopwords:
        final_words.append(word)
        
wn = WordNetLemmatizer()

lem_final_words=[]

for word in final_words:
    word=wn.lemmatize(word)
    lem_final_words.append(word)

freq_dist_text = nltk.FreqDist(lem_final_words)

plt.figure(figsize=(10,6))
plt.figtext(.5,.9,'Top30 words in comment of Glasgow Cathedral', fontsize=20, ha='center')
freq_dist_text.plot(30)
Out[77]:
<AxesSubplot:xlabel='Samples', ylabel='Counts'>
In [78]:
res_text=' '.join([i for i in lem_final_words if not i.isdigit()])

plt.figure(figsize=(20,16))
mask = np.array(image.open('12.jpg'))
wordcloud = WordCloud(    mask = mask,
                          background_color='white',
                          max_words=300,
                          max_font_size=400,
                          width=2000,
                          height=1666
                         ).generate(res_text)


plt.imshow(wordcloud)
plt.axis('off')
plt.show()
In [79]:
comment_arr = t12.to_numpy()

l=len(t12)
comment_arr=[]
subjectivity_arr=[]

text_corpus=""
for t in t12["main"]:
    text_corpus=text_corpus+"."+t

    text_nopunct=''
text_nopunct= "".join([char for char in text_corpus if char not in string.punctuation])

tokenizer = nltk.tokenize.RegexpTokenizer('\w+')
text_tokens = tokenizer.tokenize(text_nopunct)

text_words = []
for word in text_tokens:
    text_words.append(word.lower())
    
stopwords = nltk.corpus.stopwords.words('english')

final_words=[]

for word in text_words:
    if word not in stopwords:
        final_words.append(word)
        
wn = WordNetLemmatizer()

lem_final_words=[]

for word in final_words:
    word=wn.lemmatize(word)
    lem_final_words.append(word)

freq_dist_text = nltk.FreqDist(lem_final_words)

plt.figure(figsize=(10,6))
plt.figtext(.5,.9,'Top30 words in comment title of Glasgow Cathedral', fontsize=20, ha='center')
freq_dist_text.plot(30)
Out[79]:
<AxesSubplot:xlabel='Samples', ylabel='Counts'>
In [81]:
res_text=' '.join([i for i in lem_final_words if not i.isdigit()])

plt.figure(figsize=(20,16))
mask = np.array(image.open('12.jpg'))
wordcloud = WordCloud(    mask = mask,
                          background_color='white',
                          max_words=300,
                          max_font_size=400,
                          width=2000,
                          height=1666
                         ).generate(res_text)


plt.imshow(wordcloud)
plt.axis('off')
plt.show()
In [82]:
comment_arr = t13.to_numpy()

l=len(t13)
comment_arr=[]
subjectivity_arr=[]

text_corpus=""
for t in t13["content"]:
    text_corpus=text_corpus+"."+t

    text_nopunct=''
text_nopunct= "".join([char for char in text_corpus if char not in string.punctuation])

tokenizer = nltk.tokenize.RegexpTokenizer('\w+')
text_tokens = tokenizer.tokenize(text_nopunct)

text_words = []
for word in text_tokens:
    text_words.append(word.lower())
    
stopwords = nltk.corpus.stopwords.words('english')

final_words=[]

for word in text_words:
    if word not in stopwords:
        final_words.append(word)
        
wn = WordNetLemmatizer()

lem_final_words=[]

for word in final_words:
    word=wn.lemmatize(word)
    lem_final_words.append(word)

freq_dist_text = nltk.FreqDist(lem_final_words)

plt.figure(figsize=(10,6))
plt.figtext(.5,.9,'Top30 words in comment of Buchanan Street', fontsize=20, ha='center')
freq_dist_text.plot(30)
Out[82]:
<AxesSubplot:xlabel='Samples', ylabel='Counts'>
In [84]:
res_text=' '.join([i for i in lem_final_words if not i.isdigit()])

plt.figure(figsize=(20,16))
mask = np.array(image.open('13.jpg'))
wordcloud = WordCloud(    mask = mask,
                          background_color='white',
                          max_words=300,
                          max_font_size=400,
                          width=2000,
                          height=1666
                         ).generate(res_text)


plt.imshow(wordcloud)
plt.axis('off')
plt.show()
In [85]:
comment_arr = t13.to_numpy()

l=len(t13)
comment_arr=[]
subjectivity_arr=[]

text_corpus=""
for t in t13["main"]:
    text_corpus=text_corpus+"."+t

    text_nopunct=''
text_nopunct= "".join([char for char in text_corpus if char not in string.punctuation])

tokenizer = nltk.tokenize.RegexpTokenizer('\w+')
text_tokens = tokenizer.tokenize(text_nopunct)

text_words = []
for word in text_tokens:
    text_words.append(word.lower())
    
stopwords = nltk.corpus.stopwords.words('english')

final_words=[]

for word in text_words:
    if word not in stopwords:
        final_words.append(word)
        
wn = WordNetLemmatizer()

lem_final_words=[]

for word in final_words:
    word=wn.lemmatize(word)
    lem_final_words.append(word)

freq_dist_text = nltk.FreqDist(lem_final_words)

plt.figure(figsize=(10,6))
plt.figtext(.5,.9,'Top30 words in comment title of Buchanan Street', fontsize=20, ha='center')
freq_dist_text.plot(30)
Out[85]:
<AxesSubplot:xlabel='Samples', ylabel='Counts'>
In [87]:
res_text=' '.join([i for i in lem_final_words if not i.isdigit()])

plt.figure(figsize=(20,16))
mask = np.array(image.open('13.jpg'))
wordcloud = WordCloud(    mask = mask,
                          background_color='white',
                          max_words=300,
                          max_font_size=400,
                          width=2000,
                          height=1666
                         ).generate(res_text)


plt.imshow(wordcloud)
plt.axis('off')
plt.show()
In [88]:
comment_arr = t14.to_numpy()

l=len(t14)
comment_arr=[]
subjectivity_arr=[]

text_corpus=""
for t in t14["content"]:
    text_corpus=text_corpus+"."+t

    text_nopunct=''
text_nopunct= "".join([char for char in text_corpus if char not in string.punctuation])

tokenizer = nltk.tokenize.RegexpTokenizer('\w+')
text_tokens = tokenizer.tokenize(text_nopunct)

text_words = []
for word in text_tokens:
    text_words.append(word.lower())
    
stopwords = nltk.corpus.stopwords.words('english')

final_words=[]

for word in text_words:
    if word not in stopwords:
        final_words.append(word)
        
wn = WordNetLemmatizer()

lem_final_words=[]

for word in final_words:
    word=wn.lemmatize(word)
    lem_final_words.append(word)

freq_dist_text = nltk.FreqDist(lem_final_words)

plt.figure(figsize=(10,6))
plt.figtext(.5,.9,'Top30 words in comment of Pollok Country Park', fontsize=20, ha='center')
freq_dist_text.plot(30)
Out[88]:
<AxesSubplot:xlabel='Samples', ylabel='Counts'>
In [89]:
res_text=' '.join([i for i in lem_final_words if not i.isdigit()])

plt.figure(figsize=(20,16))
mask = np.array(image.open('14.jpg'))
wordcloud = WordCloud(    mask = mask,
                          background_color='white',
                          max_words=300,
                          max_font_size=400,
                          width=2000,
                          height=1666
                         ).generate(res_text)


plt.imshow(wordcloud)
plt.axis('off')
plt.show()
In [90]:
comment_arr = t14.to_numpy()

l=len(t14)
comment_arr=[]
subjectivity_arr=[]

text_corpus=""
for t in t14["main"]:
    text_corpus=text_corpus+"."+t

    text_nopunct=''
text_nopunct= "".join([char for char in text_corpus if char not in string.punctuation])

tokenizer = nltk.tokenize.RegexpTokenizer('\w+')
text_tokens = tokenizer.tokenize(text_nopunct)

text_words = []
for word in text_tokens:
    text_words.append(word.lower())
    
stopwords = nltk.corpus.stopwords.words('english')

final_words=[]

for word in text_words:
    if word not in stopwords:
        final_words.append(word)
        
wn = WordNetLemmatizer()

lem_final_words=[]

for word in final_words:
    word=wn.lemmatize(word)
    lem_final_words.append(word)

freq_dist_text = nltk.FreqDist(lem_final_words)

plt.figure(figsize=(10,6))
plt.figtext(.5,.9,'Top30 words in comment title of Pollok Country Park', fontsize=20, ha='center')
freq_dist_text.plot(30)
Out[90]:
<AxesSubplot:xlabel='Samples', ylabel='Counts'>
In [91]:
res_text=' '.join([i for i in lem_final_words if not i.isdigit()])

plt.figure(figsize=(20,16))
mask = np.array(image.open('14.jpg'))
wordcloud = WordCloud(    mask = mask,
                          background_color='white',
                          max_words=300,
                          max_font_size=400,
                          width=2000,
                          height=1666
                         ).generate(res_text)


plt.imshow(wordcloud)
plt.axis('off')
plt.show()
In [97]:
comment_arr = t15.to_numpy()

l=len(t15)
comment_arr=[]
subjectivity_arr=[]

text_corpus=""
for t in t15["content"]:
    text_corpus=text_corpus+"."+t

    text_nopunct=''
text_nopunct= "".join([char for char in text_corpus if char not in string.punctuation])

tokenizer = nltk.tokenize.RegexpTokenizer('\w+')
text_tokens = tokenizer.tokenize(text_nopunct)

text_words = []
for word in text_tokens:
    text_words.append(word.lower())
    
stopwords = nltk.corpus.stopwords.words('english')

final_words=[]

for word in text_words:
    if word not in stopwords:
        final_words.append(word)
        
wn = WordNetLemmatizer()

lem_final_words=[]

for word in final_words:
    word=wn.lemmatize(word)
    lem_final_words.append(word)

freq_dist_text = nltk.FreqDist(lem_final_words)

plt.figure(figsize=(10,6))
plt.figtext(.5,.9,'Top30 words in comment of The Tenement House', fontsize=20, ha='center')
freq_dist_text.plot(30)
Out[97]:
<AxesSubplot:xlabel='Samples', ylabel='Counts'>
In [98]:
res_text=' '.join([i for i in lem_final_words if not i.isdigit()])

plt.figure(figsize=(20,16))
mask = np.array(image.open('15.jpg'))
wordcloud = WordCloud(    mask = mask,
                          background_color='white',
                          max_words=300,
                          max_font_size=400,
                          width=2000,
                          height=1666
                         ).generate(res_text)


plt.imshow(wordcloud)
plt.axis('off')
plt.show()
In [99]:
comment_arr = t15.to_numpy()

l=len(t15)
comment_arr=[]
subjectivity_arr=[]

text_corpus=""
for t in t15["main"]:
    text_corpus=text_corpus+"."+t

    text_nopunct=''
text_nopunct= "".join([char for char in text_corpus if char not in string.punctuation])

tokenizer = nltk.tokenize.RegexpTokenizer('\w+')
text_tokens = tokenizer.tokenize(text_nopunct)

text_words = []
for word in text_tokens:
    text_words.append(word.lower())
    
stopwords = nltk.corpus.stopwords.words('english')

final_words=[]

for word in text_words:
    if word not in stopwords:
        final_words.append(word)
        
wn = WordNetLemmatizer()

lem_final_words=[]

for word in final_words:
    word=wn.lemmatize(word)
    lem_final_words.append(word)

freq_dist_text = nltk.FreqDist(lem_final_words)

plt.figure(figsize=(10,6))
plt.figtext(.5,.9,'Top30 words in comment title of The Tenement House', fontsize=20, ha='center')
freq_dist_text.plot(30)
Out[99]:
<AxesSubplot:xlabel='Samples', ylabel='Counts'>
In [100]:
res_text=' '.join([i for i in lem_final_words if not i.isdigit()])

plt.figure(figsize=(20,16))
mask = np.array(image.open('15.jpg'))
wordcloud = WordCloud(    mask = mask,
                          background_color='white',
                          max_words=300,
                          max_font_size=400,
                          width=2000,
                          height=1666
                         ).generate(res_text)


plt.imshow(wordcloud)
plt.axis('off')
plt.show()